-
Notifications
You must be signed in to change notification settings - Fork 13.3k
SelectionDAG: Support nofpclass(nan/qnan/snan/nzero) in arguments #130051
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-backend-loongarch Author: YunQiang Su (wzssyqa) ChangesSelectionDAGISel::LowerArguments: Pass NoNaN Flags to InVals.
Thus, we can use it in isKnownNeverNaN. Patch is 56.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130051.diff 6 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 20283ad8f2689..52021f88d5243 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -397,7 +397,7 @@ struct SDNodeFlags {
Exact = 1 << 2,
Disjoint = 1 << 3,
NonNeg = 1 << 4,
- NoNaNs = 1 << 5,
+ // 1 << 5 was used as NoNaNs
NoInfs = 1 << 6,
NoSignedZeros = 1 << 7,
AllowReciprocal = 1 << 8,
@@ -416,11 +416,14 @@ struct SDNodeFlags {
// Compare instructions which may carry the samesign flag.
SameSign = 1 << 14,
+ NoSNaNs = 1 << 15,
+ NoQNaNs = 1 << 16,
+
// NOTE: Please update LargestValue in LLVM_DECLARE_ENUM_AS_BITMASK below
// the class definition when adding new flags.
PoisonGeneratingFlags = NoUnsignedWrap | NoSignedWrap | Exact | Disjoint |
- NonNeg | NoNaNs | NoInfs | SameSign,
+ NonNeg | NoSNaNs | NoQNaNs | NoInfs | SameSign,
};
/// Default constructor turns off all optimization flags.
@@ -428,7 +431,8 @@ struct SDNodeFlags {
/// Propagate the fast-math-flags from an IR FPMathOperator.
void copyFMF(const FPMathOperator &FPMO) {
- setNoNaNs(FPMO.hasNoNaNs());
+ setNoSNaNs(FPMO.hasNoNaNs());
+ setNoQNaNs(FPMO.hasNoNaNs());
setNoInfs(FPMO.hasNoInfs());
setNoSignedZeros(FPMO.hasNoSignedZeros());
setAllowReciprocal(FPMO.hasAllowReciprocal());
@@ -444,7 +448,9 @@ struct SDNodeFlags {
void setDisjoint(bool b) { setFlag<Disjoint>(b); }
void setSameSign(bool b) { setFlag<SameSign>(b); }
void setNonNeg(bool b) { setFlag<NonNeg>(b); }
- void setNoNaNs(bool b) { setFlag<NoNaNs>(b); }
+ void setNoNaNs(bool b) { setFlag<NoSNaNs>(b); setFlag<NoQNaNs>(b); }
+ void setNoSNaNs(bool b) { setFlag<NoSNaNs>(b); }
+ void setNoQNaNs(bool b) { setFlag<NoQNaNs>(b); }
void setNoInfs(bool b) { setFlag<NoInfs>(b); }
void setNoSignedZeros(bool b) { setFlag<NoSignedZeros>(b); }
void setAllowReciprocal(bool b) { setFlag<AllowReciprocal>(b); }
@@ -461,7 +467,9 @@ struct SDNodeFlags {
bool hasDisjoint() const { return Flags & Disjoint; }
bool hasSameSign() const { return Flags & SameSign; }
bool hasNonNeg() const { return Flags & NonNeg; }
- bool hasNoNaNs() const { return Flags & NoNaNs; }
+ bool hasNoNaNs() const { return (Flags & NoSNaNs) && (Flags & NoQNaNs); }
+ bool hasNoSNaNs() const { return Flags & NoSNaNs; }
+ bool hasNoQNaNs() const { return Flags & NoQNaNs; }
bool hasNoInfs() const { return Flags & NoInfs; }
bool hasNoSignedZeros() const { return Flags & NoSignedZeros; }
bool hasAllowReciprocal() const { return Flags & AllowReciprocal; }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index df30148b78b65..7fc97fadeff09 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5613,7 +5613,12 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const {
// If we're told that NaNs won't happen, assume they won't.
- if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
+ if (getTarget().Options.NoNaNsFPMath)
+ return true;
+ SDNodeFlags OpFlags = Op->getFlags();
+ if (SNaN && OpFlags.hasNoSNaNs())
+ return true;
+ if (OpFlags.hasNoSNaNs() && OpFlags.hasNoQNaNs())
return true;
if (Depth >= MaxRecursionDepth)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 86b99a5210924..0604db4c64608 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -11885,6 +11885,16 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
AssertOp = ISD::AssertSext;
else if (Arg.hasAttribute(Attribute::ZExt))
AssertOp = ISD::AssertZext;
+ if (Arg.hasAttribute(Attribute::NoFPClass)) {
+ SDNodeFlags InValFlags = InVals[i]->getFlags();
+ bool NoSNaN = ((Arg.getNoFPClass() & llvm::fcSNan) == llvm::fcSNan);
+ bool NoQNaN = ((Arg.getNoFPClass() & llvm::fcQNan) == llvm::fcQNan);
+ InValFlags.setNoSNaNs(NoSNaN);
+ InValFlags.setNoQNaNs(NoQNaN);
+ InValFlags.setNoInfs((Arg.getNoFPClass() & llvm::fcInf) ==
+ llvm::fcInf);
+ InVals[i]->setFlags(InValFlags);
+ }
ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
PartVT, VT, nullptr, NewRoot,
diff --git a/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
index bb3f9a3e52a16..fd2bc238ae93e 100644
--- a/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
+++ b/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --mtriple=aarch64 --mattr=+fullfp16 < %s | FileCheck %s --check-prefix=AARCH64
+; FIXME: nofpclass with length more than 128bit, may emit unnecessary instructions.
;;;;;;;;;;;;;;;; max_f64
define double @max_nnan_f64(double %a, double %b) {
@@ -140,17 +141,6 @@ entry:
ret <8 x float> %c
}
-;;;;;;;;;;;;;;;;;; max_f16
-define half @max_nnan_f16(half %a, half %b) {
-; AARCH64-LABEL: max_nnan_f16:
-; AARCH64: // %bb.0: // %entry
-; AARCH64-NEXT: fmaxnm h0, h0, h1
-; AARCH64-NEXT: ret
-entry:
- %c = call nnan half @llvm.maximumnum.f16(half %a, half %b)
- ret half %c
-}
-
define <2 x half> @max_nnan_v2f16(<2 x half> %a, <2 x half> %b) {
; AARCH64-LABEL: max_nnan_v2f16:
; AARCH64: // %bb.0: // %entry
@@ -292,17 +282,6 @@ entry:
ret <4 x double> %c
}
-;;;;;;;;;;;;;;;;;; min_f32
-define float @min_nnan_f32(float %a, float %b) {
-; AARCH64-LABEL: min_nnan_f32:
-; AARCH64: // %bb.0: // %entry
-; AARCH64-NEXT: fminnm s0, s0, s1
-; AARCH64-NEXT: ret
-entry:
- %c = call nnan float @llvm.minimumnum.f32(float %a, float %b)
- ret float %c
-}
-
define <2 x float> @min_nnan_v2f32(<2 x float> %a, <2 x float> %b) {
; AARCH64-LABEL: min_nnan_v2f32:
; AARCH64: // %bb.0: // %entry
@@ -1032,3 +1011,977 @@ entry:
%c = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> %a, <16 x half> %b)
ret <16 x half> %c
}
+;;;;;;;;;;;;;;;; max_f64
+define double @max_nofpclass_f64(double nofpclass(nan) %a, double nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_f64:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm d0, d0, d1
+; AARCH64-NEXT: ret
+entry:
+ %c = call double @llvm.maximumnum.f64(double %a, double %b)
+ ret double %c
+}
+
+define <2 x double> @max_nofpclass_v2f64(<2 x double> nofpclass(nan) %a, <2 x double> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v2f64:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm v0.2d, v0.2d, v1.2d
+; AARCH64-NEXT: ret
+entry:
+ %c = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> %a, <2 x double> %b)
+ ret <2 x double> %c
+}
+
+define <3 x double> @max_nofpclass_v3f64(<3 x double> nofpclass(nan) %a, <3 x double> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v3f64:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: // kill: def $d3 killed $d3 def $q3
+; AARCH64-NEXT: // kill: def $d0 killed $d0 def $q0
+; AARCH64-NEXT: // kill: def $d4 killed $d4 def $q4
+; AARCH64-NEXT: // kill: def $d1 killed $d1 def $q1
+; AARCH64-NEXT: // kill: def $d2 killed $d2 def $q2
+; AARCH64-NEXT: // kill: def $d5 killed $d5 def $q5
+; AARCH64-NEXT: mov v0.d[1], v1.d[0]
+; AARCH64-NEXT: mov v3.d[1], v4.d[0]
+; AARCH64-NEXT: fminnm v2.2d, v2.2d, v2.2d
+; AARCH64-NEXT: fminnm v1.2d, v3.2d, v3.2d
+; AARCH64-NEXT: fminnm v0.2d, v0.2d, v0.2d
+; AARCH64-NEXT: fmaxnm v0.2d, v0.2d, v1.2d
+; AARCH64-NEXT: fminnm v1.2d, v5.2d, v5.2d
+; AARCH64-NEXT: fmaxnm v2.2d, v2.2d, v1.2d
+; AARCH64-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; AARCH64-NEXT: // kill: def $d0 killed $d0 killed $q0
+; AARCH64-NEXT: // kill: def $d1 killed $d1 killed $q1
+; AARCH64-NEXT: // kill: def $d2 killed $d2 killed $q2
+; AARCH64-NEXT: ret
+entry:
+ %c = call <3 x double> @llvm.maximumnum.v3f64(<3 x double> %a, <3 x double> %b)
+ ret <3 x double> %c
+}
+
+define <4 x double> @max_nofpclass_v4f64(<4 x double> nofpclass(nan) %a, <4 x double> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v4f64:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm v3.2d, v3.2d, v3.2d
+; AARCH64-NEXT: fminnm v1.2d, v1.2d, v1.2d
+; AARCH64-NEXT: fmaxnm v0.2d, v0.2d, v2.2d
+; AARCH64-NEXT: fmaxnm v1.2d, v1.2d, v3.2d
+; AARCH64-NEXT: ret
+entry:
+ %c = call <4 x double> @llvm.maximumnum.v4f64(<4 x double> %a, <4 x double> %b)
+ ret <4 x double> %c
+}
+
+;;;;;;;;;;;;;;;;;; max_f32
+define float @max_nofpclass_f32(float nofpclass(nan) %a, float nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm s0, s0, s1
+; AARCH64-NEXT: ret
+entry:
+ %c = call float @llvm.maximumnum.f32(float %a, float %b)
+ ret float %c
+}
+
+define <2 x float> @max_nofpclass_v2f32(<2 x float> nofpclass(nan) %a, <2 x float> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v2f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm v0.2s, v0.2s, v1.2s
+; AARCH64-NEXT: ret
+entry:
+ %c = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> %a, <2 x float> %b)
+ ret <2 x float> %c
+}
+
+define <3 x float> @max_nofpclass_v3f32(<3 x float> nofpclass(nan) %a, <3 x float> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v3f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
+; AARCH64-NEXT: ret
+entry:
+ %c = call <3 x float> @llvm.maximumnum.v3f32(<3 x float> %a, <3 x float> %b)
+ ret <3 x float> %c
+}
+
+define <4 x float> @max_nofpclass_v4f32(<4 x float> nofpclass(nan) %a, <4 x float> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v4f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
+; AARCH64-NEXT: ret
+entry:
+ %c = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %c
+}
+
+define <5 x float> @max_nofpclass_v5f32(<5 x float> nofpclass(nan) %a, <5 x float> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v5f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: // kill: def $s0 killed $s0 def $q0
+; AARCH64-NEXT: // kill: def $s5 killed $s5 def $q5
+; AARCH64-NEXT: // kill: def $s1 killed $s1 def $q1
+; AARCH64-NEXT: // kill: def $s6 killed $s6 def $q6
+; AARCH64-NEXT: // kill: def $s2 killed $s2 def $q2
+; AARCH64-NEXT: // kill: def $s7 killed $s7 def $q7
+; AARCH64-NEXT: // kill: def $s3 killed $s3 def $q3
+; AARCH64-NEXT: mov x8, sp
+; AARCH64-NEXT: // kill: def $s4 killed $s4 def $q4
+; AARCH64-NEXT: mov v0.s[1], v1.s[0]
+; AARCH64-NEXT: mov v5.s[1], v6.s[0]
+; AARCH64-NEXT: mov v0.s[2], v2.s[0]
+; AARCH64-NEXT: mov v5.s[2], v7.s[0]
+; AARCH64-NEXT: ldr s2, [sp, #8]
+; AARCH64-NEXT: fminnm v2.4s, v2.4s, v2.4s
+; AARCH64-NEXT: mov v0.s[3], v3.s[0]
+; AARCH64-NEXT: ld1 { v5.s }[3], [x8]
+; AARCH64-NEXT: fminnm v3.4s, v4.4s, v4.4s
+; AARCH64-NEXT: fminnm v1.4s, v5.4s, v5.4s
+; AARCH64-NEXT: fminnm v0.4s, v0.4s, v0.4s
+; AARCH64-NEXT: fmaxnm v4.4s, v3.4s, v2.4s
+; AARCH64-NEXT: // kill: def $s4 killed $s4 killed $q4
+; AARCH64-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
+; AARCH64-NEXT: mov s1, v0.s[1]
+; AARCH64-NEXT: mov s2, v0.s[2]
+; AARCH64-NEXT: mov s3, v0.s[3]
+; AARCH64-NEXT: // kill: def $s0 killed $s0 killed $q0
+; AARCH64-NEXT: ret
+entry:
+ %c = call <5 x float> @llvm.maximumnum.v5f32(<5 x float> %a, <5 x float> %b)
+ ret <5 x float> %c
+}
+
+define <8 x float> @max_nofpclass_v8f32(<8 x float> nofpclass(nan) %a, <8 x float> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v8f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm v3.4s, v3.4s, v3.4s
+; AARCH64-NEXT: fminnm v1.4s, v1.4s, v1.4s
+; AARCH64-NEXT: fmaxnm v0.4s, v0.4s, v2.4s
+; AARCH64-NEXT: fmaxnm v1.4s, v1.4s, v3.4s
+; AARCH64-NEXT: ret
+entry:
+ %c = call <8 x float> @llvm.maximumnum.v8f32(<8 x float> %a, <8 x float> %b)
+ ret <8 x float> %c
+}
+
+define <2 x half> @max_nofpclass_v2f16(<2 x half> nofpclass(nan) %a, <2 x half> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v2f16:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm v0.4h, v0.4h, v1.4h
+; AARCH64-NEXT: ret
+entry:
+ %c = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> %a, <2 x half> %b)
+ ret <2 x half> %c
+}
+
+define <4 x half> @max_nofpclass_v4f16(<4 x half> nofpclass(nan) %a, <4 x half> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v4f16:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm v0.4h, v0.4h, v1.4h
+; AARCH64-NEXT: ret
+entry:
+ %c = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> %a, <4 x half> %b)
+ ret <4 x half> %c
+}
+
+define <8 x half> @max_nofpclass_v8f16(<8 x half> nofpclass(nan) %a, <8 x half> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v8f16:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm v0.8h, v0.8h, v1.8h
+; AARCH64-NEXT: ret
+entry:
+ %c = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> %a, <8 x half> %b)
+ ret <8 x half> %c
+}
+
+define <9 x half> @max_nofpclass_v9f16(<9 x half> nofpclass(nan) %a, <9 x half> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v9f16:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: // kill: def $h0 killed $h0 def $q0
+; AARCH64-NEXT: // kill: def $h1 killed $h1 def $q1
+; AARCH64-NEXT: // kill: def $h2 killed $h2 def $q2
+; AARCH64-NEXT: add x9, sp, #16
+; AARCH64-NEXT: // kill: def $h3 killed $h3 def $q3
+; AARCH64-NEXT: // kill: def $h4 killed $h4 def $q4
+; AARCH64-NEXT: // kill: def $h5 killed $h5 def $q5
+; AARCH64-NEXT: // kill: def $h6 killed $h6 def $q6
+; AARCH64-NEXT: // kill: def $h7 killed $h7 def $q7
+; AARCH64-NEXT: mov v0.h[1], v1.h[0]
+; AARCH64-NEXT: ldr h1, [sp, #8]
+; AARCH64-NEXT: ld1 { v1.h }[1], [x9]
+; AARCH64-NEXT: add x9, sp, #24
+; AARCH64-NEXT: mov v0.h[2], v2.h[0]
+; AARCH64-NEXT: ldr h2, [sp]
+; AARCH64-NEXT: ld1 { v1.h }[2], [x9]
+; AARCH64-NEXT: add x9, sp, #32
+; AARCH64-NEXT: fminnm v2.8h, v2.8h, v2.8h
+; AARCH64-NEXT: mov v0.h[3], v3.h[0]
+; AARCH64-NEXT: ld1 { v1.h }[3], [x9]
+; AARCH64-NEXT: add x9, sp, #40
+; AARCH64-NEXT: ldr h3, [sp, #72]
+; AARCH64-NEXT: ld1 { v1.h }[4], [x9]
+; AARCH64-NEXT: add x9, sp, #48
+; AARCH64-NEXT: fminnm v3.8h, v3.8h, v3.8h
+; AARCH64-NEXT: mov v0.h[4], v4.h[0]
+; AARCH64-NEXT: ld1 { v1.h }[5], [x9]
+; AARCH64-NEXT: add x9, sp, #56
+; AARCH64-NEXT: fmaxnm v2.8h, v2.8h, v3.8h
+; AARCH64-NEXT: mov v0.h[5], v5.h[0]
+; AARCH64-NEXT: ld1 { v1.h }[6], [x9]
+; AARCH64-NEXT: add x9, sp, #64
+; AARCH64-NEXT: str h2, [x8, #16]
+; AARCH64-NEXT: mov v0.h[6], v6.h[0]
+; AARCH64-NEXT: ld1 { v1.h }[7], [x9]
+; AARCH64-NEXT: fminnm v1.8h, v1.8h, v1.8h
+; AARCH64-NEXT: mov v0.h[7], v7.h[0]
+; AARCH64-NEXT: fminnm v0.8h, v0.8h, v0.8h
+; AARCH64-NEXT: fmaxnm v0.8h, v0.8h, v1.8h
+; AARCH64-NEXT: str q0, [x8]
+; AARCH64-NEXT: ret
+entry:
+ %c = call <9 x half> @llvm.maximumnum.v9f16(<9 x half> %a, <9 x half> %b)
+ ret <9 x half> %c
+}
+
+define <16 x half> @max_nofpclass_v16f16(<16 x half> nofpclass(nan) %a, <16 x half> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v16f16:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm v3.8h, v3.8h, v3.8h
+; AARCH64-NEXT: fminnm v1.8h, v1.8h, v1.8h
+; AARCH64-NEXT: fmaxnm v0.8h, v0.8h, v2.8h
+; AARCH64-NEXT: fmaxnm v1.8h, v1.8h, v3.8h
+; AARCH64-NEXT: ret
+entry:
+ %c = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> %a, <16 x half> %b)
+ ret <16 x half> %c
+}
+
+;;;;;;;;;;;;;;;; min_f64
+define double @min_nofpclass_f64(double nofpclass(nan) %a, double nofpclass(nan) %b) {
+; AARCH64-LABEL: min_nofpclass_f64:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm d0, d0, d1
+; AARCH64-NEXT: ret
+entry:
+ %c = call double @llvm.minimumnum.f64(double %a, double %b)
+ ret double %c
+}
+
+define <2 x double> @min_nofpclass_v2f64(<2 x double> nofpclass(nan) %a, <2 x double> nofpclass(nan) %b) {
+; AARCH64-LABEL: min_nofpclass_v2f64:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm v0.2d, v0.2d, v1.2d
+; AARCH64-NEXT: ret
+entry:
+ %c = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %a, <2 x double> %b)
+ ret <2 x double> %c
+}
+
+define <3 x double> @min_nofpclass_v3f64(<3 x double> nofpclass(nan) %a, <3 x double> nofpclass(nan) %b) {
+; AARCH64-LABEL: min_nofpclass_v3f64:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: // kill: def $d3 killed $d3 def $q3
+; AARCH64-NEXT: // kill: def $d0 killed $d0 def $q0
+; AARCH64-NEXT: // kill: def $d4 killed $d4 def $q4
+; AARCH64-NEXT: // kill: def $d1 killed $d1 def $q1
+; AARCH64-NEXT: // kill: def $d2 killed $d2 def $q2
+; AARCH64-NEXT: // kill: def $d5 killed $d5 def $q5
+; AARCH64-NEXT: mov v0.d[1], v1.d[0]
+; AARCH64-NEXT: mov v3.d[1], v4.d[0]
+; AARCH64-NEXT: fminnm v2.2d, v2.2d, v2.2d
+; AARCH64-NEXT: fminnm v1.2d, v3.2d, v3.2d
+; AARCH64-NEXT: fminnm v0.2d, v0.2d, v0.2d
+; AARCH64-NEXT: fminnm v0.2d, v0.2d, v1.2d
+; AARCH64-NEXT: fminnm v1.2d, v5.2d, v5.2d
+; AARCH64-NEXT: fminnm v2.2d, v2.2d, v1.2d
+; AARCH64-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; AARCH64-NEXT: // kill: def $d0 killed $d0 killed $q0
+; AARCH64-NEXT: // kill: def $d1 killed $d1 killed $q1
+; AARCH64-NEXT: // kill: def $d2 killed $d2 killed $q2
+; AARCH64-NEXT: ret
+entry:
+ %c = call <3 x double> @llvm.minimumnum.v3f64(<3 x double> %a, <3 x double> %b)
+ ret <3 x double> %c
+}
+
+define <4 x double> @min_nofpclass_v4f64(<4 x double> nofpclass(nan) %a, <4 x double> nofpclass(nan) %b) {
+; AARCH64-LABEL: min_nofpclass_v4f64:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm v3.2d, v3.2d, v3.2d
+; AARCH64-NEXT: fminnm v1.2d, v1.2d, v1.2d
+; AARCH64-NEXT: fminnm v0.2d, v0.2d, v2.2d
+; AARCH64-NEXT: fminnm v1.2d, v1.2d, v3.2d
+; AARCH64-NEXT: ret
+entry:
+ %c = call <4 x double> @llvm.minimumnum.v4f64(<4 x double> %a, <4 x double> %b)
+ ret <4 x double> %c
+}
+
+define <2 x float> @min_nofpclass_v2f32(<2 x float> nofpclass(nan) %a, <2 x float> nofpclass(nan) %b) {
+; AARCH64-LABEL: min_nofpclass_v2f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm v0.2s, v0.2s, v1.2s
+; AARCH64-NEXT: ret
+entry:
+ %c = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> %a, <2 x float> %b)
+ ret <2 x float> %c
+}
+
+define <3 x float> @min_nofpclass_v3f32(<3 x float> nofpclass(nan) %a, <3 x float> nofpclass(nan) %b) {
+; AARCH64-LABEL: min_nofpclass_v3f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm v0.4s, v0.4s, v1.4s
+; AARCH64-NEXT: ret
+entry:
+ %c = call <3 x float> @llvm.minimumnum.v3f32(<3 x float> %a, <3 x float> %b)
+ ret <3 x float> %c
+}
+
+define <4 x float> @min_nofpclass_v4f32(<4 x float> nofpclass(nan) %a, <4 x float> nofpclass(nan) %b) {
+; AARCH64-LABEL: min_nofpclass_v4f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm v0.4s, v0.4s, v1.4s
+; AARCH64-NEXT: ret
+entry:
+ %c = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %c
+}
+
+define <5 x float> @min_nofpclass_v5f32(<5 x float> nofpclass(nan) %a, <5 x float> nofpclass(nan) %b) {
+; AARCH64-LABEL: min_nofpclass_v5f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: // kill: def $s0 killed $s0 def $q0
+; AARCH64-NEXT: // kill: def $s5 killed $s5 def $q5
+; AARCH64-NEXT: // kill: def $s1 killed $s1 def $q1
+; AARCH64-NEXT: // kill: def $s6 killed $s6 def $q6
+; AARCH64-NEXT: // kill: def $s2 killed $s2 def $q2
+; AARCH64-NEXT: // kill: def $s7 killed $s7 def $q7
+; AARCH64-NEXT: // kill: def $s3 killed $s3 def $q3
+; AARCH64-NEXT: mov x8, sp
+; AARCH64-NEXT: // kill: def $s4 killed $s4 def $q4
+; AARCH64-NE...
[truncated]
|
@llvm/pr-subscribers-llvm-selectiondag Author: YunQiang Su (wzssyqa) ChangesSelectionDAGISel::LowerArguments: Pass NoNaN Flags to InVals.
Thus, we can use it in isKnownNeverNaN. Patch is 56.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130051.diff 6 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 20283ad8f2689..52021f88d5243 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -397,7 +397,7 @@ struct SDNodeFlags {
Exact = 1 << 2,
Disjoint = 1 << 3,
NonNeg = 1 << 4,
- NoNaNs = 1 << 5,
+ // 1 << 5 was used as NoNaNs
NoInfs = 1 << 6,
NoSignedZeros = 1 << 7,
AllowReciprocal = 1 << 8,
@@ -416,11 +416,14 @@ struct SDNodeFlags {
// Compare instructions which may carry the samesign flag.
SameSign = 1 << 14,
+ NoSNaNs = 1 << 15,
+ NoQNaNs = 1 << 16,
+
// NOTE: Please update LargestValue in LLVM_DECLARE_ENUM_AS_BITMASK below
// the class definition when adding new flags.
PoisonGeneratingFlags = NoUnsignedWrap | NoSignedWrap | Exact | Disjoint |
- NonNeg | NoNaNs | NoInfs | SameSign,
+ NonNeg | NoSNaNs | NoQNaNs | NoInfs | SameSign,
};
/// Default constructor turns off all optimization flags.
@@ -428,7 +431,8 @@ struct SDNodeFlags {
/// Propagate the fast-math-flags from an IR FPMathOperator.
void copyFMF(const FPMathOperator &FPMO) {
- setNoNaNs(FPMO.hasNoNaNs());
+ setNoSNaNs(FPMO.hasNoNaNs());
+ setNoQNaNs(FPMO.hasNoNaNs());
setNoInfs(FPMO.hasNoInfs());
setNoSignedZeros(FPMO.hasNoSignedZeros());
setAllowReciprocal(FPMO.hasAllowReciprocal());
@@ -444,7 +448,9 @@ struct SDNodeFlags {
void setDisjoint(bool b) { setFlag<Disjoint>(b); }
void setSameSign(bool b) { setFlag<SameSign>(b); }
void setNonNeg(bool b) { setFlag<NonNeg>(b); }
- void setNoNaNs(bool b) { setFlag<NoNaNs>(b); }
+ void setNoNaNs(bool b) { setFlag<NoSNaNs>(b); setFlag<NoQNaNs>(b); }
+ void setNoSNaNs(bool b) { setFlag<NoSNaNs>(b); }
+ void setNoQNaNs(bool b) { setFlag<NoQNaNs>(b); }
void setNoInfs(bool b) { setFlag<NoInfs>(b); }
void setNoSignedZeros(bool b) { setFlag<NoSignedZeros>(b); }
void setAllowReciprocal(bool b) { setFlag<AllowReciprocal>(b); }
@@ -461,7 +467,9 @@ struct SDNodeFlags {
bool hasDisjoint() const { return Flags & Disjoint; }
bool hasSameSign() const { return Flags & SameSign; }
bool hasNonNeg() const { return Flags & NonNeg; }
- bool hasNoNaNs() const { return Flags & NoNaNs; }
+ bool hasNoNaNs() const { return (Flags & NoSNaNs) && (Flags & NoQNaNs); }
+ bool hasNoSNaNs() const { return Flags & NoSNaNs; }
+ bool hasNoQNaNs() const { return Flags & NoQNaNs; }
bool hasNoInfs() const { return Flags & NoInfs; }
bool hasNoSignedZeros() const { return Flags & NoSignedZeros; }
bool hasAllowReciprocal() const { return Flags & AllowReciprocal; }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index df30148b78b65..7fc97fadeff09 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5613,7 +5613,12 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const {
// If we're told that NaNs won't happen, assume they won't.
- if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
+ if (getTarget().Options.NoNaNsFPMath)
+ return true;
+ SDNodeFlags OpFlags = Op->getFlags();
+ if (SNaN && OpFlags.hasNoSNaNs())
+ return true;
+ if (OpFlags.hasNoSNaNs() && OpFlags.hasNoQNaNs())
return true;
if (Depth >= MaxRecursionDepth)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 86b99a5210924..0604db4c64608 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -11885,6 +11885,16 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
AssertOp = ISD::AssertSext;
else if (Arg.hasAttribute(Attribute::ZExt))
AssertOp = ISD::AssertZext;
+ if (Arg.hasAttribute(Attribute::NoFPClass)) {
+ SDNodeFlags InValFlags = InVals[i]->getFlags();
+ bool NoSNaN = ((Arg.getNoFPClass() & llvm::fcSNan) == llvm::fcSNan);
+ bool NoQNaN = ((Arg.getNoFPClass() & llvm::fcQNan) == llvm::fcQNan);
+ InValFlags.setNoSNaNs(NoSNaN);
+ InValFlags.setNoQNaNs(NoQNaN);
+ InValFlags.setNoInfs((Arg.getNoFPClass() & llvm::fcInf) ==
+ llvm::fcInf);
+ InVals[i]->setFlags(InValFlags);
+ }
ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
PartVT, VT, nullptr, NewRoot,
diff --git a/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
index bb3f9a3e52a16..fd2bc238ae93e 100644
--- a/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
+++ b/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --mtriple=aarch64 --mattr=+fullfp16 < %s | FileCheck %s --check-prefix=AARCH64
+; FIXME: nofpclass with length more than 128bit, may emit unnecessary instructions.
;;;;;;;;;;;;;;;; max_f64
define double @max_nnan_f64(double %a, double %b) {
@@ -140,17 +141,6 @@ entry:
ret <8 x float> %c
}
-;;;;;;;;;;;;;;;;;; max_f16
-define half @max_nnan_f16(half %a, half %b) {
-; AARCH64-LABEL: max_nnan_f16:
-; AARCH64: // %bb.0: // %entry
-; AARCH64-NEXT: fmaxnm h0, h0, h1
-; AARCH64-NEXT: ret
-entry:
- %c = call nnan half @llvm.maximumnum.f16(half %a, half %b)
- ret half %c
-}
-
define <2 x half> @max_nnan_v2f16(<2 x half> %a, <2 x half> %b) {
; AARCH64-LABEL: max_nnan_v2f16:
; AARCH64: // %bb.0: // %entry
@@ -292,17 +282,6 @@ entry:
ret <4 x double> %c
}
-;;;;;;;;;;;;;;;;;; min_f32
-define float @min_nnan_f32(float %a, float %b) {
-; AARCH64-LABEL: min_nnan_f32:
-; AARCH64: // %bb.0: // %entry
-; AARCH64-NEXT: fminnm s0, s0, s1
-; AARCH64-NEXT: ret
-entry:
- %c = call nnan float @llvm.minimumnum.f32(float %a, float %b)
- ret float %c
-}
-
define <2 x float> @min_nnan_v2f32(<2 x float> %a, <2 x float> %b) {
; AARCH64-LABEL: min_nnan_v2f32:
; AARCH64: // %bb.0: // %entry
@@ -1032,3 +1011,977 @@ entry:
%c = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> %a, <16 x half> %b)
ret <16 x half> %c
}
+;;;;;;;;;;;;;;;; max_f64
+define double @max_nofpclass_f64(double nofpclass(nan) %a, double nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_f64:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm d0, d0, d1
+; AARCH64-NEXT: ret
+entry:
+ %c = call double @llvm.maximumnum.f64(double %a, double %b)
+ ret double %c
+}
+
+define <2 x double> @max_nofpclass_v2f64(<2 x double> nofpclass(nan) %a, <2 x double> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v2f64:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm v0.2d, v0.2d, v1.2d
+; AARCH64-NEXT: ret
+entry:
+ %c = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> %a, <2 x double> %b)
+ ret <2 x double> %c
+}
+
+define <3 x double> @max_nofpclass_v3f64(<3 x double> nofpclass(nan) %a, <3 x double> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v3f64:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: // kill: def $d3 killed $d3 def $q3
+; AARCH64-NEXT: // kill: def $d0 killed $d0 def $q0
+; AARCH64-NEXT: // kill: def $d4 killed $d4 def $q4
+; AARCH64-NEXT: // kill: def $d1 killed $d1 def $q1
+; AARCH64-NEXT: // kill: def $d2 killed $d2 def $q2
+; AARCH64-NEXT: // kill: def $d5 killed $d5 def $q5
+; AARCH64-NEXT: mov v0.d[1], v1.d[0]
+; AARCH64-NEXT: mov v3.d[1], v4.d[0]
+; AARCH64-NEXT: fminnm v2.2d, v2.2d, v2.2d
+; AARCH64-NEXT: fminnm v1.2d, v3.2d, v3.2d
+; AARCH64-NEXT: fminnm v0.2d, v0.2d, v0.2d
+; AARCH64-NEXT: fmaxnm v0.2d, v0.2d, v1.2d
+; AARCH64-NEXT: fminnm v1.2d, v5.2d, v5.2d
+; AARCH64-NEXT: fmaxnm v2.2d, v2.2d, v1.2d
+; AARCH64-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; AARCH64-NEXT: // kill: def $d0 killed $d0 killed $q0
+; AARCH64-NEXT: // kill: def $d1 killed $d1 killed $q1
+; AARCH64-NEXT: // kill: def $d2 killed $d2 killed $q2
+; AARCH64-NEXT: ret
+entry:
+ %c = call <3 x double> @llvm.maximumnum.v3f64(<3 x double> %a, <3 x double> %b)
+ ret <3 x double> %c
+}
+
+define <4 x double> @max_nofpclass_v4f64(<4 x double> nofpclass(nan) %a, <4 x double> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v4f64:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm v3.2d, v3.2d, v3.2d
+; AARCH64-NEXT: fminnm v1.2d, v1.2d, v1.2d
+; AARCH64-NEXT: fmaxnm v0.2d, v0.2d, v2.2d
+; AARCH64-NEXT: fmaxnm v1.2d, v1.2d, v3.2d
+; AARCH64-NEXT: ret
+entry:
+ %c = call <4 x double> @llvm.maximumnum.v4f64(<4 x double> %a, <4 x double> %b)
+ ret <4 x double> %c
+}
+
+;;;;;;;;;;;;;;;;;; max_f32
+define float @max_nofpclass_f32(float nofpclass(nan) %a, float nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm s0, s0, s1
+; AARCH64-NEXT: ret
+entry:
+ %c = call float @llvm.maximumnum.f32(float %a, float %b)
+ ret float %c
+}
+
+define <2 x float> @max_nofpclass_v2f32(<2 x float> nofpclass(nan) %a, <2 x float> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v2f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm v0.2s, v0.2s, v1.2s
+; AARCH64-NEXT: ret
+entry:
+ %c = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> %a, <2 x float> %b)
+ ret <2 x float> %c
+}
+
+define <3 x float> @max_nofpclass_v3f32(<3 x float> nofpclass(nan) %a, <3 x float> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v3f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
+; AARCH64-NEXT: ret
+entry:
+ %c = call <3 x float> @llvm.maximumnum.v3f32(<3 x float> %a, <3 x float> %b)
+ ret <3 x float> %c
+}
+
+define <4 x float> @max_nofpclass_v4f32(<4 x float> nofpclass(nan) %a, <4 x float> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v4f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
+; AARCH64-NEXT: ret
+entry:
+ %c = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %c
+}
+
+define <5 x float> @max_nofpclass_v5f32(<5 x float> nofpclass(nan) %a, <5 x float> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v5f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: // kill: def $s0 killed $s0 def $q0
+; AARCH64-NEXT: // kill: def $s5 killed $s5 def $q5
+; AARCH64-NEXT: // kill: def $s1 killed $s1 def $q1
+; AARCH64-NEXT: // kill: def $s6 killed $s6 def $q6
+; AARCH64-NEXT: // kill: def $s2 killed $s2 def $q2
+; AARCH64-NEXT: // kill: def $s7 killed $s7 def $q7
+; AARCH64-NEXT: // kill: def $s3 killed $s3 def $q3
+; AARCH64-NEXT: mov x8, sp
+; AARCH64-NEXT: // kill: def $s4 killed $s4 def $q4
+; AARCH64-NEXT: mov v0.s[1], v1.s[0]
+; AARCH64-NEXT: mov v5.s[1], v6.s[0]
+; AARCH64-NEXT: mov v0.s[2], v2.s[0]
+; AARCH64-NEXT: mov v5.s[2], v7.s[0]
+; AARCH64-NEXT: ldr s2, [sp, #8]
+; AARCH64-NEXT: fminnm v2.4s, v2.4s, v2.4s
+; AARCH64-NEXT: mov v0.s[3], v3.s[0]
+; AARCH64-NEXT: ld1 { v5.s }[3], [x8]
+; AARCH64-NEXT: fminnm v3.4s, v4.4s, v4.4s
+; AARCH64-NEXT: fminnm v1.4s, v5.4s, v5.4s
+; AARCH64-NEXT: fminnm v0.4s, v0.4s, v0.4s
+; AARCH64-NEXT: fmaxnm v4.4s, v3.4s, v2.4s
+; AARCH64-NEXT: // kill: def $s4 killed $s4 killed $q4
+; AARCH64-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
+; AARCH64-NEXT: mov s1, v0.s[1]
+; AARCH64-NEXT: mov s2, v0.s[2]
+; AARCH64-NEXT: mov s3, v0.s[3]
+; AARCH64-NEXT: // kill: def $s0 killed $s0 killed $q0
+; AARCH64-NEXT: ret
+entry:
+ %c = call <5 x float> @llvm.maximumnum.v5f32(<5 x float> %a, <5 x float> %b)
+ ret <5 x float> %c
+}
+
+define <8 x float> @max_nofpclass_v8f32(<8 x float> nofpclass(nan) %a, <8 x float> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v8f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm v3.4s, v3.4s, v3.4s
+; AARCH64-NEXT: fminnm v1.4s, v1.4s, v1.4s
+; AARCH64-NEXT: fmaxnm v0.4s, v0.4s, v2.4s
+; AARCH64-NEXT: fmaxnm v1.4s, v1.4s, v3.4s
+; AARCH64-NEXT: ret
+entry:
+ %c = call <8 x float> @llvm.maximumnum.v8f32(<8 x float> %a, <8 x float> %b)
+ ret <8 x float> %c
+}
+
+define <2 x half> @max_nofpclass_v2f16(<2 x half> nofpclass(nan) %a, <2 x half> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v2f16:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm v0.4h, v0.4h, v1.4h
+; AARCH64-NEXT: ret
+entry:
+ %c = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> %a, <2 x half> %b)
+ ret <2 x half> %c
+}
+
+define <4 x half> @max_nofpclass_v4f16(<4 x half> nofpclass(nan) %a, <4 x half> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v4f16:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm v0.4h, v0.4h, v1.4h
+; AARCH64-NEXT: ret
+entry:
+ %c = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> %a, <4 x half> %b)
+ ret <4 x half> %c
+}
+
+define <8 x half> @max_nofpclass_v8f16(<8 x half> nofpclass(nan) %a, <8 x half> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v8f16:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm v0.8h, v0.8h, v1.8h
+; AARCH64-NEXT: ret
+entry:
+ %c = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> %a, <8 x half> %b)
+ ret <8 x half> %c
+}
+
+define <9 x half> @max_nofpclass_v9f16(<9 x half> nofpclass(nan) %a, <9 x half> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v9f16:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: // kill: def $h0 killed $h0 def $q0
+; AARCH64-NEXT: // kill: def $h1 killed $h1 def $q1
+; AARCH64-NEXT: // kill: def $h2 killed $h2 def $q2
+; AARCH64-NEXT: add x9, sp, #16
+; AARCH64-NEXT: // kill: def $h3 killed $h3 def $q3
+; AARCH64-NEXT: // kill: def $h4 killed $h4 def $q4
+; AARCH64-NEXT: // kill: def $h5 killed $h5 def $q5
+; AARCH64-NEXT: // kill: def $h6 killed $h6 def $q6
+; AARCH64-NEXT: // kill: def $h7 killed $h7 def $q7
+; AARCH64-NEXT: mov v0.h[1], v1.h[0]
+; AARCH64-NEXT: ldr h1, [sp, #8]
+; AARCH64-NEXT: ld1 { v1.h }[1], [x9]
+; AARCH64-NEXT: add x9, sp, #24
+; AARCH64-NEXT: mov v0.h[2], v2.h[0]
+; AARCH64-NEXT: ldr h2, [sp]
+; AARCH64-NEXT: ld1 { v1.h }[2], [x9]
+; AARCH64-NEXT: add x9, sp, #32
+; AARCH64-NEXT: fminnm v2.8h, v2.8h, v2.8h
+; AARCH64-NEXT: mov v0.h[3], v3.h[0]
+; AARCH64-NEXT: ld1 { v1.h }[3], [x9]
+; AARCH64-NEXT: add x9, sp, #40
+; AARCH64-NEXT: ldr h3, [sp, #72]
+; AARCH64-NEXT: ld1 { v1.h }[4], [x9]
+; AARCH64-NEXT: add x9, sp, #48
+; AARCH64-NEXT: fminnm v3.8h, v3.8h, v3.8h
+; AARCH64-NEXT: mov v0.h[4], v4.h[0]
+; AARCH64-NEXT: ld1 { v1.h }[5], [x9]
+; AARCH64-NEXT: add x9, sp, #56
+; AARCH64-NEXT: fmaxnm v2.8h, v2.8h, v3.8h
+; AARCH64-NEXT: mov v0.h[5], v5.h[0]
+; AARCH64-NEXT: ld1 { v1.h }[6], [x9]
+; AARCH64-NEXT: add x9, sp, #64
+; AARCH64-NEXT: str h2, [x8, #16]
+; AARCH64-NEXT: mov v0.h[6], v6.h[0]
+; AARCH64-NEXT: ld1 { v1.h }[7], [x9]
+; AARCH64-NEXT: fminnm v1.8h, v1.8h, v1.8h
+; AARCH64-NEXT: mov v0.h[7], v7.h[0]
+; AARCH64-NEXT: fminnm v0.8h, v0.8h, v0.8h
+; AARCH64-NEXT: fmaxnm v0.8h, v0.8h, v1.8h
+; AARCH64-NEXT: str q0, [x8]
+; AARCH64-NEXT: ret
+entry:
+ %c = call <9 x half> @llvm.maximumnum.v9f16(<9 x half> %a, <9 x half> %b)
+ ret <9 x half> %c
+}
+
+define <16 x half> @max_nofpclass_v16f16(<16 x half> nofpclass(nan) %a, <16 x half> nofpclass(nan) %b) {
+; AARCH64-LABEL: max_nofpclass_v16f16:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm v3.8h, v3.8h, v3.8h
+; AARCH64-NEXT: fminnm v1.8h, v1.8h, v1.8h
+; AARCH64-NEXT: fmaxnm v0.8h, v0.8h, v2.8h
+; AARCH64-NEXT: fmaxnm v1.8h, v1.8h, v3.8h
+; AARCH64-NEXT: ret
+entry:
+ %c = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> %a, <16 x half> %b)
+ ret <16 x half> %c
+}
+
+;;;;;;;;;;;;;;;; min_f64
+define double @min_nofpclass_f64(double nofpclass(nan) %a, double nofpclass(nan) %b) {
+; AARCH64-LABEL: min_nofpclass_f64:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm d0, d0, d1
+; AARCH64-NEXT: ret
+entry:
+ %c = call double @llvm.minimumnum.f64(double %a, double %b)
+ ret double %c
+}
+
+define <2 x double> @min_nofpclass_v2f64(<2 x double> nofpclass(nan) %a, <2 x double> nofpclass(nan) %b) {
+; AARCH64-LABEL: min_nofpclass_v2f64:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm v0.2d, v0.2d, v1.2d
+; AARCH64-NEXT: ret
+entry:
+ %c = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %a, <2 x double> %b)
+ ret <2 x double> %c
+}
+
+define <3 x double> @min_nofpclass_v3f64(<3 x double> nofpclass(nan) %a, <3 x double> nofpclass(nan) %b) {
+; AARCH64-LABEL: min_nofpclass_v3f64:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: // kill: def $d3 killed $d3 def $q3
+; AARCH64-NEXT: // kill: def $d0 killed $d0 def $q0
+; AARCH64-NEXT: // kill: def $d4 killed $d4 def $q4
+; AARCH64-NEXT: // kill: def $d1 killed $d1 def $q1
+; AARCH64-NEXT: // kill: def $d2 killed $d2 def $q2
+; AARCH64-NEXT: // kill: def $d5 killed $d5 def $q5
+; AARCH64-NEXT: mov v0.d[1], v1.d[0]
+; AARCH64-NEXT: mov v3.d[1], v4.d[0]
+; AARCH64-NEXT: fminnm v2.2d, v2.2d, v2.2d
+; AARCH64-NEXT: fminnm v1.2d, v3.2d, v3.2d
+; AARCH64-NEXT: fminnm v0.2d, v0.2d, v0.2d
+; AARCH64-NEXT: fminnm v0.2d, v0.2d, v1.2d
+; AARCH64-NEXT: fminnm v1.2d, v5.2d, v5.2d
+; AARCH64-NEXT: fminnm v2.2d, v2.2d, v1.2d
+; AARCH64-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; AARCH64-NEXT: // kill: def $d0 killed $d0 killed $q0
+; AARCH64-NEXT: // kill: def $d1 killed $d1 killed $q1
+; AARCH64-NEXT: // kill: def $d2 killed $d2 killed $q2
+; AARCH64-NEXT: ret
+entry:
+ %c = call <3 x double> @llvm.minimumnum.v3f64(<3 x double> %a, <3 x double> %b)
+ ret <3 x double> %c
+}
+
+define <4 x double> @min_nofpclass_v4f64(<4 x double> nofpclass(nan) %a, <4 x double> nofpclass(nan) %b) {
+; AARCH64-LABEL: min_nofpclass_v4f64:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm v3.2d, v3.2d, v3.2d
+; AARCH64-NEXT: fminnm v1.2d, v1.2d, v1.2d
+; AARCH64-NEXT: fminnm v0.2d, v0.2d, v2.2d
+; AARCH64-NEXT: fminnm v1.2d, v1.2d, v3.2d
+; AARCH64-NEXT: ret
+entry:
+ %c = call <4 x double> @llvm.minimumnum.v4f64(<4 x double> %a, <4 x double> %b)
+ ret <4 x double> %c
+}
+
+define <2 x float> @min_nofpclass_v2f32(<2 x float> nofpclass(nan) %a, <2 x float> nofpclass(nan) %b) {
+; AARCH64-LABEL: min_nofpclass_v2f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm v0.2s, v0.2s, v1.2s
+; AARCH64-NEXT: ret
+entry:
+ %c = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> %a, <2 x float> %b)
+ ret <2 x float> %c
+}
+
+define <3 x float> @min_nofpclass_v3f32(<3 x float> nofpclass(nan) %a, <3 x float> nofpclass(nan) %b) {
+; AARCH64-LABEL: min_nofpclass_v3f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm v0.4s, v0.4s, v1.4s
+; AARCH64-NEXT: ret
+entry:
+ %c = call <3 x float> @llvm.minimumnum.v3f32(<3 x float> %a, <3 x float> %b)
+ ret <3 x float> %c
+}
+
+define <4 x float> @min_nofpclass_v4f32(<4 x float> nofpclass(nan) %a, <4 x float> nofpclass(nan) %b) {
+; AARCH64-LABEL: min_nofpclass_v4f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm v0.4s, v0.4s, v1.4s
+; AARCH64-NEXT: ret
+entry:
+ %c = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %c
+}
+
+define <5 x float> @min_nofpclass_v5f32(<5 x float> nofpclass(nan) %a, <5 x float> nofpclass(nan) %b) {
+; AARCH64-LABEL: min_nofpclass_v5f32:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: // kill: def $s0 killed $s0 def $q0
+; AARCH64-NEXT: // kill: def $s5 killed $s5 def $q5
+; AARCH64-NEXT: // kill: def $s1 killed $s1 def $q1
+; AARCH64-NEXT: // kill: def $s6 killed $s6 def $q6
+; AARCH64-NEXT: // kill: def $s2 killed $s2 def $q2
+; AARCH64-NEXT: // kill: def $s7 killed $s7 def $q7
+; AARCH64-NEXT: // kill: def $s3 killed $s3 def $q3
+; AARCH64-NEXT: mov x8, sp
+; AARCH64-NEXT: // kill: def $s4 killed $s4 def $q4
+; AARCH64-NE...
[truncated]
|
af140e1
to
7020dbc
Compare
32f4676
to
b35e701
Compare
@arsenm ping |
2ac0759
to
d9e047f
Compare
SelectionDAGISel::LowerArguments: Pass NoNaN Flags to InVals. `nofpclass` support values nan, snan, qnan, where nan=snan|qnan. So let's use NoSNaNs and NoQNaNs in SDNodeFlags. Thus, we can use it in isKnownNeverNaN. support ISD::AssertFPNoClass Fix AssertFPNoClass
94fbe22
to
81b9064
Compare
@@ -397,7 +397,7 @@ struct SDNodeFlags { | |||
Exact = 1 << 2, | |||
Disjoint = 1 << 3, | |||
NonNeg = 1 << 4, | |||
NoNaNs = 1 << 5, | |||
// 1 << 5 was used as NoNaNs |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unrelated change, shouldn't touch the fast math flags
@@ -859,6 +859,7 @@ def SDT_assert : SDTypeProfile<1, 1, | |||
[SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>; | |||
def assertsext : SDNode<"ISD::AssertSext", SDT_assert>; | |||
def assertzext : SDNode<"ISD::AssertZext", SDT_assert>; | |||
def assernofpclass : SDNode<"ISD::AssertNoFPClass", SDTFPUnaryOp>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Typo inherited from the other patch
SDNodeFlags OutValFlags = OutVal->getFlags(); | ||
bool NoSNaN = ((Arg.getNoFPClass() & llvm::fcSNan) == llvm::fcSNan); | ||
bool NoQNaN = ((Arg.getNoFPClass() & llvm::fcQNan) == llvm::fcQNan); | ||
bool NoInf = ((Arg.getNoFPClass() & llvm::fcInf) == llvm::fcInf); | ||
bool NoNegZero = | ||
((Arg.getNoFPClass() & llvm::fcInf) == llvm::fcNegZero); | ||
OutValFlags.setNoSNaNs(NoSNaN); | ||
OutValFlags.setNoQNaNs(NoQNaN); | ||
OutValFlags.setNoInfs(NoInf); | ||
OutValFlags.setNoSignedZeros(NoNegZero); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You should not be setting any fast math flags. Particularly NSZ, the meaning of the NSZ flag is not that there isn't a -0 value
%z = call float @llvm.minimumnum.f32(float %x, float %y) | ||
ret float %z | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should also have some vector cases. Also is nofpclass supported for uniform aggregate structs now? The rules for FPMathOperator were relaxed, so does the verifier complain if you use nofpclass`` on a
{float, float}`?
SDNodeFlags OpFlags = Op->getFlags(); | ||
if (SNaN && OpFlags.hasNoSNaNs()) | ||
return true; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't need to touch the flags. This code should be directly inspecting AssertNoFPClass's carried mask
SDNodeFlags OutValFlags = OutVal->getFlags(); | ||
bool NoSNaN = ((Arg.getNoFPClass() & llvm::fcSNan) == llvm::fcSNan); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
AssertNoFPClass should have an explicit integer mask argument that directly copies the value. You shouldn't be trying to re-encode that value as fast math flags
SelectionDAGISel::LowerArguments: Pass NoNaN Flags to InVals.
nofpclass
support values nan, snan, qnan, where nan=snan|qnan. So let's use NoSNaNs and NoQNaNs in SDNodeFlags.Thus, we can use it in isKnownNeverNaN.